import pathlib
from pathlib import Path

data_dir = '/Users/avinash.v/Projects/indix/qa/data/domain_data/'
data_dir_path = Path(data_dir)

list_of_text_files = list(data_dir_path.glob('**/*.txt'))
print(len(list_of_text_files))


# set list_of_text_files as an env variable so it can be used by the create_pretrainging data script